df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
## [1] "player" "dunk_tot" "dunk_pct" "rim_tot"
## [5] "rim_pct" "rim_asted" "other2pt_tot" "other2pt_pct"
## [9] "other2pt_asted" "3pt_tot" "3pt_pct" "3pt_asted"
## [13] "games" "mp_per_g" "fg_per_g" "fga_per_g"
## [17] "fg_pct" "fg2_per_g" "fg2a_per_g" "fg2_pct"
## [21] "fg3_per_g" "fg3a_per_g" "fg3_pct" "ft_per_g"
## [25] "fta_per_g" "ft_pct" "orb_per_g" "drb_per_g"
## [29] "trb_per_g" "ast_per_g" "stl_per_g" "blk_per_g"
## [33] "tov_per_g" "pts_per_g"
path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')
df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(drop_cols)
##
## # Now:
## data %>% select(all_of(drop_cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)
colnames(df_career_stats)
## [1] "pick_overall" "player" "college_name" "seasons" "g"
## [6] "fg_pct" "fg3_pct" "ft_pct" "mp_per_g" "pts_per_g"
## [11] "trb_per_g" "ast_per_g" "ws" "ws_per_48" "bpm"
## [16] "vorp" "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
pra_per_g = pts_per_g + trb_per_g + ast_per_g,
pick_overall = factor(pick_overall)
)
# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
summarize(avg_mpg = mean(mp_per_g),
avg_ppg = mean(pts_per_g),
avg_trbpg = mean(trb_per_g),
avg_apg = mean(ast_per_g),
avg_prapg = mean(pra_per_g))
print(draft_means, n = 14)
## # A tibble: 14 × 6
## pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.2 18.8 6.52 4.32 29.7
## 2 2 26.9 13.7 4.96 2.88 21.6
## 3 3 30.0 17.6 6.3 3.52 27.4
## 4 4 27.5 12.8 5.39 2.48 20.7
## 5 5 26.0 12.7 4.92 3.55 21.2
## 6 6 23.2 10.2 4.35 2.26 16.8
## 7 7 27.6 13.4 4.88 2.77 21.0
## 8 8 21.1 8.82 3.37 1.77 14.0
## 9 9 24.5 10.9 4.55 2.52 18.0
## 10 10 23.5 10.4 3.61 2.31 16.3
## 11 11 21.1 10.2 3.69 2.16 16.0
## 12 12 24.6 10.6 4.47 2.23 17.3
## 13 13 22.6 10.9 3.99 2.23 17.1
## 14 14 20.3 8.78 3.75 1.3 13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
geom_boxplot() +
labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")
df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |>
separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))
df2 <- df2 |>
separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))
df2 <- df2 |>
separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))
df2 <-df2 |>
mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))
df2 <- df2 |> mutate(
vorp_per_g = vorp / g
)
df_top_players <- df2 |>
group_by(pick_overall) |>
summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))
df_bottom_players <- df2 |>
group_by(pick_overall) |>
summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))
# metric favors big men
# make the rebound percentile higher
is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
ppg <- df_top_players |> pull(pts_per_g_nba)
apg <- df_top_players |> pull(ast_per_g_nba)
prapg <- df_top_players |> pull(pra_per_g)
df_top_rb <- df2 |>
group_by(pick_overall) |>
summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
rpg <- df_top_rb |> pull(trb_per_g)
df_top_vorp <- df2 |>
group_by(pick_overall) |>
summarize(vorp_per_g = median(vorp_per_g))
vorppg <- df_top_vorp |> pull(vorp_per_g)
df <- df2 |> filter(pick_overall == pick_number) |>
filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
filter(vorp_per_g >= vorppg[[pick_number]])
# must also have played at least most of their career in the nba
df <- df |> filter(seasons >= 4 / 5 * (2023-year))
return(df)
}
is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
ppg <- df_bottom_players |> pull(pts_per_g_nba)
apg <- df_bottom_players |> pull(ast_per_g_nba)
prapg <- df_bottom_players |> pull(pra_per_g)
df_bottom_rb <- df2 |>
group_by(pick_overall) |>
summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
rpg <- df_bottom_rb |> pull(trb_per_g)
df_bottom_vorp <- df2 |>
group_by(pick_overall) |>
summarize(vorp_per_g = quantile(vorp_per_g, probs = 0.3))
vorppg <- df_bottom_vorp |> pull(vorp_per_g)
# playing less than half the seasons since drafted makes you a bust
df <- df2 |> filter(pick_overall == pick_number) |>
filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)
# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)
df_pick_1
## # A tibble: 7 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 John Wall 33 36 91.7% 116 182 63.7%
## 2 Kyrie Irving 0 0 0% 26 39 66.7%
## 3 Anthony Davis 96 98 98.0% 152 174 87.4%
## 4 Karl-Anthony T… 22 24 91.7% 87 121 71.9%
## 5 Ben Simmons 56 61 91.8% 159 220 72.3%
## 6 Zion Williamson 72 79 91.1% 247 313 78.9%
## 7 Anthony Edwards 27 27 100.0% 89 129 69.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Anthony Bennett 53 58 91.4% 100 140 71.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)
df_pick_2
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 D'Angelo Russe… 4 4 100.0% 70 110 63.6%
## 2 Brandon Ingram 17 17 100.0% 69 117 59.0%
## 3 Lonzo Ball 37 40 92.5% 94 120 78.3%
## 4 Ja Morant 28 31 90.3% 160 264 60.6%
## 5 Chet Holmgren 57 57 100.0% 105 125 84.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Derrick Willia… 56 60 93.3% 135 188 71.8%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)
df_pick_3
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Bradley Beal 18 20 90.0% 89 137 65.0%
## 2 Joel Embiid 30 30 100.0% 80 99 80.8%
## 3 Jayson Tatum 18 21 85.7% 79 126 62.7%
## 4 Evan Mobley 63 66 95.5% 113 144 78.5%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Jahlil Okafor 64 67 95.5% 213 270 78.9%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)
df_pick_4
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Aaron Gordon 54 56 96.4% 137 198 69.2%
## 2 Jaren Jackson … 31 31 100.0% 61 93 65.6%
## 3 Scottie Barnes 19 21 90.5% 61 89 68.5%
## 4 Keegan Murray 63 67 94.0% 196 277 70.8%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## # dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## # rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## # 3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## # fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)
df_pick_5
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 DeMarcus Cousi… 53 57 93.0% 144 189 76.2%
## 2 De'Aaron Fox 20 21 95.2% 131 203 64.5%
## 3 Trae Young 0 0 0% 105 201 52.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Thomas Robinson 70 83 84.3% 169 262 64.5%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)
df_pick_6
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Damian Lillard 13 17 76.5% 98 169 58.0%
## 2 Nerlens Noel 48 50 96.0% 76 99 76.8%
## 3 Marcus Smart 16 18 88.9% 78 110 70.9%
## 4 Buddy Hield 18 22 81.8% 119 178 66.9%
## 5 Onyeka Okongwu 58 61 95.1% 135 186 72.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ekpe Udoh 30 32 93.8% 78 109 71.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)
df_pick_7
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Julius Randle 37 40 92.5% 132 197 67.0%
## 2 Jamal Murray 18 19 94.7% 77 111 69.4%
## 3 Lauri Markkanen 20 24 83.3% 65 100 65.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ben McLemore 44 45 97.8% 90 126 71.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)
df_pick_8
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Al-Farouq Aminu 46 48 95.8% 112 173 64.7%
## 2 Kentavious Cal… 15 16 93.8% 63 94 67.0%
## 3 Franz Wagner 11 11 100.0% 63 93 67.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## # dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## # rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## # 3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## # fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)
df_pick_9
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Gordon Hayward 19 20 95.0% 89 128 69.5%
## 2 Kemba Walker 3 3 100.0% 115 196 58.7%
## 3 Andre Drummond 80 89 89.9% 130 185 70.3%
## 4 Trey Burke 9 9 100.0% 67 105 63.8%
## 5 Jakob Poeltl 32 34 94.1% 199 284 70.1%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Kevin Knox 18 20 90.0% 65 99 65.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)
df_pick_10
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Paul George 18 22 81.8% 70 106 66.0%
## 2 CJ McCollum 3 3 100.0% 34 63 54.0%
## 3 Elfrid Payton 21 24 87.5% 169 247 68.4%
## 4 Mikal Bridges 35 42 83.3% 109 161 67.7%
## 5 Jalen Smith 49 52 94.2% 114 158 72.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ziaire Williams 10 11 90.9% 26 49 53.1%
## 2 Johnny Davis 16 19 84.2% 89 143 62.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)
df_pick_11
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Klay Thompson 8 8 100.0% 66 110 60.0%
## 2 Myles Turner 11 13 84.6% 40 54 74.1%
## 3 Domantas Sabon… 22 24 91.7% 157 214 73.4%
## 4 Shai Gilgeous-… 11 11 100.0% 108 182 59.3%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 James Bouknight 12 12 100.0% 52 79 65.8%
## 2 Jett Howard 6 6 100.0% 29 47 61.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)
df_pick_12
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Steven Adams 29 33 87.9% 85 129 65.9%
## 2 Miles Bridges 30 35 85.7% 84 128 65.6%
## 3 Tyrese Halibur… 7 8 87.5% 46 62 74.2%
## 4 Jalen Williams 25 27 92.6% 124 186 66.7%
## 5 Dereck Lively … 54 55 98.2% 74 96 77.1%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Xavier Henry 17 17 100.0% 60 90 66.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)
df_pick_13
## # A tibble: 7 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ed Davis 26 27 96.3% 42 50 84.0%
## 2 Kelly Olynyk 25 28 89.3% 152 212 71.7%
## 3 Zach LaVine 21 25 84.0% 51 90 56.7%
## 4 Devin Booker 8 9 88.9% 42 59 71.2%
## 5 Donovan Mitche… 9 13 69.2% 64 116 55.2%
## 6 Tyler Herro 4 5 80.0% 56 84 66.7%
## 7 Jalen Duren 70 76 92.1% 111 152 73.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Kendall Marsha… 0 0 0% 35 53 66.0%
## 2 Jerome Robinson 12 13 92.3% 98 157 62.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)
df_pick_14
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Marcus Morris 31 33 93.9% 114 147 77.6%
## 2 T.J. Warren 37 37 100.0% 192 251 76.5%
## 3 Cameron Payne 3 3 100.0% 53 87 60.9%
## 4 Bam Adebayo 99 105 94.3% 138 185 74.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Romeo Langford 10 14 71.4% 91 138 65.9%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes
df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)
df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)
print(df_good, n = 20)
## # A tibble: 64 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 John Wall 33 36 91.7% 116 182 63.7%
## 2 Kyrie Irving 0 0 0% 26 39 66.7%
## 3 Anthony Davis 96 98 98.0% 152 174 87.4%
## 4 Karl-Anthony … 22 24 91.7% 87 121 71.9%
## 5 Ben Simmons 56 61 91.8% 159 220 72.3%
## 6 Zion Williams… 72 79 91.1% 247 313 78.9%
## 7 Anthony Edwar… 27 27 100.0% 89 129 69.0%
## 8 D'Angelo Russ… 4 4 100.0% 70 110 63.6%
## 9 Brandon Ingram 17 17 100.0% 69 117 59.0%
## 10 Lonzo Ball 37 40 92.5% 94 120 78.3%
## 11 Ja Morant 28 31 90.3% 160 264 60.6%
## 12 Chet Holmgren 57 57 100.0% 105 125 84.0%
## 13 Bradley Beal 18 20 90.0% 89 137 65.0%
## 14 Joel Embiid 30 30 100.0% 80 99 80.8%
## 15 Jayson Tatum 18 21 85.7% 79 126 62.7%
## 16 Evan Mobley 63 66 95.5% 113 144 78.5%
## 17 Aaron Gordon 54 56 96.4% 137 198 69.2%
## 18 Jaren Jackson… 31 31 100.0% 61 93 65.6%
## 19 Scottie Barnes 19 21 90.5% 61 89 68.5%
## 20 Keegan Murray 63 67 94.0% 196 277 70.8%
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
## [1] "John Wall" "Kyrie Irving"
## [3] "Anthony Davis" "Karl-Anthony Towns"
## [5] "Ben Simmons" "Zion Williamson"
## [7] "Anthony Edwards" "D'Angelo Russell"
## [9] "Brandon Ingram" "Lonzo Ball"
## [11] "Ja Morant" "Chet Holmgren"
## [13] "Bradley Beal" "Joel Embiid"
## [15] "Jayson Tatum" "Evan Mobley"
## [17] "Aaron Gordon" "Jaren Jackson Jr."
## [19] "Scottie Barnes" "Keegan Murray"
## [21] "DeMarcus Cousins" "De'Aaron Fox"
## [23] "Trae Young" "Damian Lillard"
## [25] "Nerlens Noel" "Marcus Smart"
## [27] "Buddy Hield" "Onyeka Okongwu"
## [29] "Julius Randle" "Jamal Murray"
## [31] "Lauri Markkanen" "Al-Farouq Aminu"
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"
## [35] "Gordon Hayward" "Kemba Walker"
## [37] "Andre Drummond" "Trey Burke"
## [39] "Jakob Poeltl" "Paul George"
## [41] "CJ McCollum" "Elfrid Payton"
## [43] "Mikal Bridges" "Jalen Smith"
## [45] "Klay Thompson" "Myles Turner"
## [47] "Domantas Sabonis" "Shai Gilgeous-Alexander"
## [49] "Steven Adams" "Miles Bridges"
## [51] "Tyrese Haliburton" "Jalen Williams"
## [53] "Dereck Lively II" "Ed Davis"
## [55] "Kelly Olynyk" "Zach LaVine"
## [57] "Devin Booker" "Donovan Mitchell"
## [59] "Tyler Herro" "Jalen Duren"
## [61] "Marcus Morris" "T.J. Warren"
## [63] "Cameron Payne" "Bam Adebayo"
good_list <- df_good |> pull(player)
df_busts |> pull(player)
## [1] "Anthony Bennett" "Derrick Williams" "Jahlil Okafor" "Thomas Robinson"
## [5] "Ekpe Udoh" "Ben McLemore" "Kevin Knox" "Ziaire Williams"
## [9] "Johnny Davis" "James Bouknight" "Jett Howard" "Xavier Henry"
## [13] "Kendall Marshall" "Jerome Robinson" "Romeo Langford"
bust_list <- df_busts |> pull(player)
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
geom_point(color = "green", size = 4, alpha = 0.5) +
geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
theme_bw()
plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
geom_point(color = "red", size = 4, alpha = 0.5) +
geom_label_repel(size = 3) +
labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
theme_bw()
plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
labs(title = "CBB Shot Selection for NBA Lottery Picks",
x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game", color = "Value") +
scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
theme_bw()
plot_combined
plot_busts
plot_good
library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
colnames(df2)
## [1] "player" "dunk_made" "dunk_attempts"
## [4] "dunk_pct" "rim_made" "rim_attempts"
## [7] "rim_pct" "rim_asted" "other2pt_made"
## [10] "other2pt_attempts" "other2pt_pct" "other2pt_asted"
## [13] "3pt_tot" "3pt_pct" "3pt_asted"
## [16] "games" "mp_per_g_college" "fg_per_g"
## [19] "fga_per_g" "fg_pct_college" "fg2_per_g"
## [22] "fg2a_per_g" "fg2_pct" "fg3_per_g"
## [25] "fg3a_per_g" "fg3_pct_college" "ft_per_g"
## [28] "fta_per_g" "ft_pct_college" "orb_per_g"
## [31] "drb_per_g" "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g" "blk_per_g" "tov_per_g"
## [37] "pts_per_g_college" "pick_overall" "college_name"
## [40] "seasons" "g" "fg_pct_nba"
## [43] "fg3_pct_nba" "ft_pct_nba" "mp_per_g_nba"
## [46] "pts_per_g_nba" "trb_per_g_nba" "ast_per_g_nba"
## [49] "ws" "ws_per_48" "bpm"
## [52] "vorp" "year" "pra_per_g"
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")
df_cbb <- df3 |> select(
dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g,
`3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |>
mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
rim_pct = parse_number(rim_pct) / 100,
rim_asted = parse_number(rim_asted) / 100,
other2pt_pct = parse_number(other2pt_pct) / 100,
other2pt_asted = parse_number(other2pt_asted) / 100,
fg3_asted = parse_number(fg3_asted) / 100,
fg3_pct_per_g = fg3_per_g / fg3a_per_g)
df_cbb <- df_cbb |>
mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
relocate(fg3_pct_per_g, .after = fg3_asted)
to_per_game <- function(x, games) {
x <- x / games
return(x)
}
df_cbb <- df_cbb |> # making everything per game
mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made,
other2pt_attempts), function(x) to_per_game(x, games)))
colSums(is.na(df_cbb))
## dunk_made dunk_attempts dunk_pct rim_made
## 0 0 0 0
## rim_attempts rim_pct rim_asted other2pt_made
## 0 0 0 0
## other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## 0 0 0 0
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g
## 0 0 0 0
## games ft_per_g fta_per_g ast_per_g_college
## 0 0 0 0
## orb_per_g drb_per_g stl_per_g blk_per_g
## 0 0 0 0
## tov_per_g pts_per_g_college
## 0 0
Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r
df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
## dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.338 0.359 0.125 0.886 1.15 -0.582 -0.936
## 2 -0.789 -0.813 0.332 -0.925 -1.11 1.48 -1.47
## 3 1.25 1.23 0.325 1.46 1.11 1.46 0.662
## 4 0.599 0.655 0.0208 0.478 0.382 0.591 0.733
## 5 1.13 1.15 0.215 1.44 1.18 1.16 0.633
## 6 0.246 0.233 0.270 0.0605 -0.0262 0.521 -0.0688
## 7 -0.560 -0.589 0.387 0.233 0.367 -0.443 0.0432
## 8 0.102 0.0619 0.408 -0.246 -0.174 -0.443 0.0668
## 9 -0.635 -0.661 0.353 -0.687 -0.754 0.228 0.615
## 10 -0.619 -0.573 -0.560 -0.839 -0.861 -0.261 -0.623
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## # fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## # stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)
corr_matrix <- cor(df_cbb_scaled)
colnames(corr_matrix)
## [1] "dunk_made" "dunk_attempts" "dunk_pct"
## [4] "rim_made" "rim_attempts" "rim_pct"
## [7] "rim_asted" "other2pt_made" "other2pt_attempts"
## [10] "other2pt_pct" "other2pt_asted" "fg2_pct"
## [13] "fg3_per_g" "fg3a_per_g" "fg3_asted"
## [16] "fg3_pct_per_g" "ft_per_g" "fta_per_g"
## [19] "ast_per_g_college" "orb_per_g" "drb_per_g"
## [22] "stl_per_g" "blk_per_g" "tov_per_g"
## [25] "pts_per_g_college"
ggcorrplot(corr_matrix, method = "square")
ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
type = "lower", tl.cex = 7, title = "Correlations between different college statistics")
Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f
df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
## dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Lonzo Ball 1.02777778 1.11111111 0.925 2.611111 3.333333
## Meyers Leonard 0.78461538 0.86153846 0.911 1.676923 2.215385
## Wendell Carter Jr. 1.51351351 1.56756757 0.966 3.243243 4.729730
## Derrick Williams 0.81159420 0.86956522 0.933 1.956522 2.724638
## Davion Mitchell 0.03333333 0.03333333 1.000 1.166667 1.850000
## Moses Moody 0.31250000 0.34375000 0.909 1.937500 3.437500
## rim_pct rim_asted other2pt_made other2pt_attempts
## Lonzo Ball 0.783 0.511 0.4166667 0.8055556
## Meyers Leonard 0.757 0.798 0.9230769 2.0923077
## Wendell Carter Jr. 0.686 0.608 1.0810811 2.6216216
## Derrick Williams 0.718 0.585 0.7101449 1.7101449
## Davion Mitchell 0.631 0.186 0.4166667 0.9500000
## Moses Moody 0.564 0.371 1.4062500 3.5625000
## other2pt_pct other2pt_asted fg2_pct fg3_per_g fg3a_per_g
## Lonzo Ball 0.517 0.200 0.732 2.2 5.4
## Meyers Leonard 0.441 0.500 0.585 0.0 0.2
## Wendell Carter Jr. 0.412 0.500 0.586 0.5 1.2
## Derrick Williams 0.415 0.388 0.598 0.7 1.3
## Davion Mitchell 0.439 0.080 0.519 1.6 4.1
## Moses Moody 0.395 0.311 0.478 1.8 5.1
## fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## Lonzo Ball 0.738 0.4074074 36 1.8 2.7
## Meyers Leonard 1.000 0.0000000 65 1.6 2.2
## Wendell Carter Jr. 0.947 0.4166667 37 3.4 4.5
## Derrick Williams 0.929 0.5384615 69 5.9 8.2
## Davion Mitchell 0.603 0.3902439 60 1.6 2.4
## Moses Moody 0.897 0.3529412 32 4.7 5.8
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g
## Lonzo Ball 7.6 0.9 5.1 1.8 0.8
## Meyers Leonard 0.7 1.3 3.4 0.3 1.1
## Wendell Carter Jr. 2.0 2.9 6.1 0.8 2.1
## Derrick Williams 0.9 2.5 5.2 0.8 0.7
## Davion Mitchell 4.7 0.4 2.3 1.7 0.4
## Moses Moody 1.6 2.0 3.8 1.0 0.7
## tov_per_g pts_per_g_college
## Lonzo Ball 2.5 14.6
## Meyers Leonard 1.4 7.7
## Wendell Carter Jr. 2.0 13.5
## Derrick Williams 2.3 17.8
## Davion Mitchell 2.3 12.0
## Moses Moody 1.6 16.8
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)
fviz_eig(df_cbb.pca, addlabels = TRUE, main = "Statistics Represented in Lower Dimensional Components")
fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)
var <- get_pca_var(df_cbb.pca)
fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2) +
labs(title = "Quality of Representation to PCA Dimensions 1 and 2")
fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1) +
labs(title = "NBA Lottery Picks on PCA Dimensions 1 and 2")
summary(df_cbb.pca)
##
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 7.916 5.341 2.094 1.495 1.261 1.201 1.002
## % of var. 31.663 21.364 8.376 5.978 5.045 4.803 4.008
## Cumulative % of var. 31.663 53.027 61.403 67.381 72.426 77.229 81.237
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.967 0.751 0.556 0.446 0.420 0.327 0.267
## % of var. 3.867 3.006 2.225 1.786 1.679 1.310 1.067
## Cumulative % of var. 85.104 88.110 90.335 92.121 93.800 95.110 96.177
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.258 0.213 0.164 0.141 0.072 0.062 0.024
## % of var. 1.032 0.850 0.655 0.563 0.286 0.249 0.097
## Cumulative % of var. 97.209 98.059 98.714 99.277 99.564 99.813 99.910
## Dim.22 Dim.23 Dim.24 Dim.25
## Variance 0.011 0.006 0.004 0.002
## % of var. 0.046 0.023 0.014 0.007
## Cumulative % of var. 99.955 99.979 99.993 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2
## Lonzo Ball | 6.220 | -0.362 0.010 0.003 | -0.139 0.002 0.001 |
## Meyers Leonard | 5.490 | 2.912 0.649 0.281 | -3.535 1.418 0.415 |
## Wendell Carter Jr. | 4.044 | 3.305 0.836 0.668 | 0.783 0.070 0.038 |
## Derrick Williams | 4.779 | 1.068 0.087 0.050 | 1.313 0.196 0.075 |
## Davion Mitchell | 4.911 | -3.464 0.919 0.497 | -1.853 0.389 0.142 |
## Moses Moody | 3.425 | -1.599 0.196 0.218 | 1.186 0.160 0.120 |
## Noah Vonleh | 3.293 | 1.038 0.083 0.099 | 0.048 0.000 0.000 |
## Markieff Morris | 4.397 | 1.608 0.198 0.134 | -3.641 1.505 0.686 |
## Joshua Primo | 5.795 | -1.515 0.176 0.068 | -4.202 2.003 0.526 |
## Josh Jackson | 4.144 | 1.320 0.133 0.101 | 3.011 1.029 0.528 |
## Dim.3 ctr cos2
## Lonzo Ball -1.249 0.451 0.040 |
## Meyers Leonard 0.266 0.021 0.002 |
## Wendell Carter Jr. 0.907 0.238 0.050 |
## Derrick Williams 0.916 0.243 0.037 |
## Davion Mitchell -1.563 0.707 0.101 |
## Moses Moody 1.641 0.780 0.230 |
## Noah Vonleh 0.352 0.036 0.011 |
## Markieff Morris 0.222 0.014 0.003 |
## Joshua Primo -0.484 0.068 0.007 |
## Josh Jackson 0.003 0.000 0.000 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
## dunk_made | 0.835 8.802 0.697 | 0.336 2.117 0.113 | -0.025
## dunk_attempts | 0.829 8.675 0.687 | 0.338 2.140 0.114 | -0.025
## dunk_pct | 0.334 1.412 0.112 | -0.120 0.269 0.014 | 0.228
## rim_made | 0.584 4.305 0.341 | 0.672 8.468 0.452 | -0.088
## rim_attempts | 0.451 2.571 0.203 | 0.740 10.243 0.547 | -0.093
## rim_pct | 0.724 6.625 0.524 | -0.148 0.409 0.022 | -0.001
## rim_asted | 0.767 7.441 0.589 | -0.354 2.351 0.126 | 0.182
## other2pt_made | 0.106 0.141 0.011 | 0.601 6.772 0.362 | 0.560
## other2pt_attempts | 0.102 0.131 0.010 | 0.651 7.924 0.423 | 0.484
## other2pt_pct | 0.007 0.001 0.000 | -0.045 0.039 0.002 | 0.359
## ctr cos2
## dunk_made 0.029 0.001 |
## dunk_attempts 0.030 0.001 |
## dunk_pct 2.485 0.052 |
## rim_made 0.370 0.008 |
## rim_attempts 0.410 0.009 |
## rim_pct 0.000 0.000 |
## rim_asted 1.578 0.033 |
## other2pt_made 14.964 0.313 |
## other2pt_attempts 11.203 0.235 |
## other2pt_pct 6.140 0.129 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.8233 2.3577 1.45399 1.29215 1.18882 1.10678 1.00292
## Proportion of Variance 0.3066 0.2138 0.08131 0.06422 0.05436 0.04711 0.03869
## Cumulative Proportion 0.3066 0.5204 0.60167 0.66589 0.72025 0.76736 0.80605
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.98328 0.87000 0.75739 0.66985 0.65034 0.64385 0.56902
## Proportion of Variance 0.03719 0.02911 0.02206 0.01726 0.01627 0.01594 0.01245
## Cumulative Proportion 0.84323 0.87234 0.89441 0.91166 0.92793 0.94388 0.95633
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.51470 0.49095 0.41133 0.40183 0.37408 0.26745 0.2038
## Proportion of Variance 0.01019 0.00927 0.00651 0.00621 0.00538 0.00275 0.0016
## Cumulative Proportion 0.96652 0.97579 0.98230 0.98851 0.99389 0.99664 0.9982
## PC22 PC23 PC24 PC25 PC26
## Standard deviation 0.1527 0.10681 0.07652 0.05949 0.04172
## Proportion of Variance 0.0009 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion 0.9991 0.99957 0.99980 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
## PC1 PC2
## Lonzo Ball -0.30972840 0.0007053551
## Meyers Leonard 2.74024056 3.6508169349
## Wendell Carter Jr. 3.36498747 -0.7216499359
## Derrick Williams 1.03830203 -1.0304451116
## Davion Mitchell -3.51406172 1.6468468321
## Moses Moody -1.46960252 -1.4190061757
## Noah Vonleh 1.11130925 -0.1751834144
## Markieff Morris 1.29699896 4.0373945397
## Joshua Primo -1.55551858 3.7707023835
## Josh Jackson 1.48957168 -3.0475464620
## Willie Cauley-Stein 2.65175786 4.0213075524
## De'Aaron Fox -0.87066774 -3.3180112387
## Brandon Miller -2.00076132 -1.9889065138
## Coby White -3.35981962 -0.9338304491
## Miles Bridges -0.51099544 0.8957909493
## CJ McCollum -3.90112719 -0.5500261983
## Nerlens Noel 5.45383914 -0.7991793564
## Steven Adams 3.66839570 2.7680962536
## Denzel Valentine -3.08887719 3.2154062834
## Cody Zeller 2.02496946 -0.5680230150
## Zion Williamson 5.07447989 -4.6504795504
## Cameron Johnson -1.47106716 2.1704255345
## Keegan Murray 1.59473589 0.7127863270
## Kendall Marshall -4.60320232 1.8524037126
## Collin Sexton -2.52183153 -3.9991906129
## Kyrie Irving -3.77122159 -3.0033308087
## Cade Cunningham -2.90919016 -4.4345298643
## Stanley Johnson -1.16931864 -1.1296582673
## Malik Monk -2.28778612 -1.6797126865
## Anthony Davis 6.73630616 -1.2629548886
## Markelle Fultz -2.05054203 -6.2356705555
## Julius Randle 2.20418418 -3.0357101236
## Jett Howard -2.91653599 1.5725624278
## Mo Bamba 5.12168936 -0.6472841174
## Rui Hachimura 0.89990828 2.6695528608
## Jalen Duren 5.98165537 -0.5776256581
## Jabari Smith Jr. -1.62886867 -1.4477810399
## Jaylen Brown -0.86593793 -2.2952001302
## Michael Kidd-Gilchrist 1.49000302 -0.5168492854
## Anthony Black -1.20227004 -1.7148572319
## Zach LaVine -1.60422247 2.6481909219
## Jimmer Fredette -4.12885438 0.1532925420
## Jordan Hawkins -3.13721836 2.6918367917
## Wes Johnson 1.50316126 -1.3047269862
## Marvin Bagley III 5.65427831 -4.5448753922
## Aaron Gordon 2.71643469 -0.5168844660
## Romeo Langford -0.73466257 -2.2444498661
## RJ Barrett -0.08315247 -5.0528618008
## Cason Wallace -2.34104930 0.2152815400
## Patrick Patterson 2.53963602 2.1109010563
## Evan Turner -1.49673522 -0.4578015058
## Ekpe Udoh 2.87709283 -2.0867754797
## Jabari Parker 2.40112005 -3.8466488862
## Donovan Mitchell -3.15753551 1.8102604526
## Dereck Lively II 5.06532407 4.9338352381
## Buddy Hield -3.54777620 1.4535258538
## Derrick Favors 5.01881131 -0.5372658952
## Ben McLemore 0.24239908 -0.1870327074
## Franz Wagner -1.24385093 2.2215940450
## Onyeka Okongwu 5.79890613 -2.6047417619
## Austin Rivers -2.95329868 -1.3791937733
## Myles Turner 1.86401362 1.8030642874
## D'Angelo Russell -2.98270993 -2.6314533501
## Harrison Barnes -1.11071750 0.4675508975
## Isaac Okoro 0.42993503 -0.2975331821
## Patrick Williams -0.12807727 1.3341060184
## Mikal Bridges -1.20162590 3.5903451628
## Shai Gilgeous-Alexander -2.04551424 -2.3234644917
## Jarrett Culver -1.51221150 0.0681535683
## Jayson Tatum -0.78010288 -2.6444271533
## Brandon Knight -3.54151495 -2.0093567331
## Lauri Markkanen -0.18748839 0.1368727294
## Domantas Sabonis 1.67699707 1.2190984214
## Andrew Wiggins 0.36542307 -2.6025198229
## Jamal Murray -2.37633667 -1.7128111826
## Paul George -2.10747433 0.1876406410
## Xavier Henry -1.24518982 0.8915202286
## Deandre Ayton 6.32878662 -3.9424880892
## Johnny Davis -1.33669032 0.1478916084
## Gradey Dick -1.44740283 1.2927758040
## Cameron Payne -4.48285978 -1.3236954751
## Ziaire Williams -1.78971475 0.2486334187
## Otto Porter Jr. 0.04885497 1.8356037020
## Scottie Barnes -0.41005879 0.0002825293
## Ochai Agbaji -1.65972813 3.1665060860
## Jeremy Lamb -0.50899046 2.6527408299
## Jerome Robinson -3.05834301 0.2586574385
## T.J. Warren 1.08912829 -0.3669642309
## Jaden Ivey -1.86477689 0.0773337674
## Ben Simmons 3.55778734 -6.3959006442
## Damian Lillard -4.29694802 -0.3843340708
## P.J. Washington 0.33949853 1.0835362435
## Trey Burke -3.60982350 -0.5620071724
## John Wall -1.73494610 -3.9455798842
## Luke Kennard -2.76829877 0.9772815337
## Jalen Suggs -1.57680464 -1.3826079667
## Alex Len 3.60602390 2.2253867363
## De'Andre Hunter -0.77514281 2.0904369415
## Dennis Smith Jr. -2.43263222 -4.0080211081
## Brandon Ingram -1.66287033 -1.8607679236
## Marcus Smart -2.59113529 -1.9709358714
## Aaron Nesmith -3.28346405 1.6855751483
## Doug McDermott -1.01124840 1.4023518631
## Marcus Morris 0.73895771 2.6323130400
## Jonathan Isaac 1.48927680 0.4512240082
## Justise Winslow -0.36808543 -0.0140660045
## Evan Mobley 4.26687599 -2.6519044433
## Bam Adebayo 5.99686501 -1.1625539440
## Shabazz Muhammad 0.36507564 -2.1338042145
## Cole Aldrich 2.78307370 3.9300845772
## Jaren Jackson Jr. 1.49089242 1.7016335898
## Anthony Bennett 3.08683149 -0.9658417837
## Victor Oladipo -0.63903222 2.5888631078
## Kris Dunn -2.69748838 0.0951825819
## John Henson 2.97155356 3.1904829161
## Karl-Anthony Towns 2.08103954 1.1196065654
## Klay Thompson -3.84131304 0.2800845197
## Nik Stauskas -3.13687982 1.8802991525
## Kelly Olynyk 0.73492325 3.8811181762
## Jakob Poeltl 3.07463033 1.0055357287
## Devin Vassell -0.72773722 4.1046446591
## Marquese Chriss 2.77746275 -0.4521718014
## Cam Reddish -4.14985479 0.0842589880
## Kemba Walker -3.18777727 -0.5896508288
## Jalen Williams -1.80913049 1.6958213307
## Al-Farouq Aminu 1.02295866 -0.6557701582
## Kevin Knox -0.63604976 -0.7501807107
## Anthony Edwards -1.40251473 -2.6178813170
## Greg Monroe 0.55772278 -0.6192425231
## Tristan Thompson 4.60556911 -2.1556621458
## James Bouknight -1.52674532 0.8368198759
## Trae Young -6.66738911 -7.9875515113
## Terrence Ross -1.18817176 2.5860223513
## Jalen Smith 2.10897015 1.8201712718
## Jarace Walker 1.30148259 1.2591517315
## Taylor Hendricks 1.15461505 0.2239571117
## DeMarcus Cousins 4.42466023 -2.5278733794
## Gordon Hayward -0.68966882 0.8690154013
## Obi Toppin 3.01791562 0.9098400603
## Alec Burks -0.74320461 -2.0264013166
## Kentavious Caldwell-Pope -2.25315004 0.8810443210
## Zach Collins 2.23468578 2.2257336106
## Chet Holmgren 4.15750678 0.0605690899
## Taurean Prince -1.44792106 3.7467662602
## Ed Davis 3.31971345 3.1931177330
## Trey Lyles 2.26213434 2.3189123450
## Andre Drummond 5.95363392 0.5511351054
## Devin Booker -1.28579904 3.5063787159
## Joel Embiid 4.49751364 0.3052975967
## Frank Kaminsky -0.20730366 4.1181677131
## Jeremy Sochan 0.86229364 1.9652843807
## Kira Lewis Jr. -3.26278428 -0.0954317440
## Michael Carter-Williams -2.91272861 0.8684079705
## Chris Duarte -1.92568686 1.3515954340
## Bennedict Mathurin -1.07663046 1.0805356809
## Jahlil Okafor 5.47754196 -2.7772955807
## Ja Morant -2.49001353 -3.1186017067
## Dion Waiters -1.92316128 2.9143303356
## Paolo Banchero 0.71365922 -2.7656057765
## Elfrid Payton -2.00506164 -0.8904543832
## Bradley Beal -0.88415187 -0.6225122311
## Thomas Robinson 0.41276682 2.4752367530
## Jaxson Hayes 6.10602196 1.9333235932
## Tyrese Haliburton -2.28374305 2.7467761148
## Tyler Herro -1.81846173 0.4356215772
fviz_nbclust(df_cluster, kmeans, method = 'wss')
fviz_nbclust(df_cluster, kmeans, method = 'silhouette')
fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')
k <- 15
df_cbb.kmeans <- kmeans(df_cluster, centers = k, nstart = 50)
df_cbb.kmeans2 <- kmeans(df_cluster, centers = 10, nstart = 50)
df_cbb.kmeans3 <- kmeans(df_cluster, centers = 5, nstart = 50)
df_cbb.kmeans
## K-means clustering with 15 clusters of sizes 14, 10, 10, 1, 13, 15, 15, 12, 8, 6, 8, 9, 24, 15, 5
##
## Cluster means:
## PC1 PC2
## 1 -3.0038961 1.6844905
## 2 -0.7038180 -2.2727211
## 3 5.5023843 -0.2136538
## 4 -6.6673891 -7.9875515
## 5 -3.5629264 -0.2578629
## 6 -2.3589000 -1.9212256
## 7 -1.3010475 2.9669816
## 8 2.1039278 1.5172029
## 9 3.3507603 3.4891410
## 10 -1.9553824 -4.6126423
## 11 0.5984250 2.9518339
## 12 5.0209873 -3.5834690
## 13 -0.9536002 0.3927310
## 14 1.6571399 -0.5010737
## 15 1.9371256 -2.9564573
##
## Clustering vector:
## Lonzo Ball Meyers Leonard Wendell Carter Jr.
## 13 9 14
## Derrick Williams Davion Mitchell Moses Moody
## 14 1 6
## Noah Vonleh Markieff Morris Joshua Primo
## 14 11 7
## Josh Jackson Willie Cauley-Stein De'Aaron Fox
## 15 9 2
## Brandon Miller Coby White Miles Bridges
## 6 5 13
## CJ McCollum Nerlens Noel Steven Adams
## 5 3 9
## Denzel Valentine Cody Zeller Zion Williamson
## 1 14 12
## Cameron Johnson Keegan Murray Kendall Marshall
## 7 8 1
## Collin Sexton Kyrie Irving Cade Cunningham
## 10 6 10
## Stanley Johnson Malik Monk Anthony Davis
## 2 6 3
## Markelle Fultz Julius Randle Jett Howard
## 10 15 1
## Mo Bamba Rui Hachimura Jalen Duren
## 3 11 3
## Jabari Smith Jr. Jaylen Brown Michael Kidd-Gilchrist
## 6 2 14
## Anthony Black Zach LaVine Jimmer Fredette
## 2 7 5
## Jordan Hawkins Wes Johnson Marvin Bagley III
## 1 14 12
## Aaron Gordon Romeo Langford RJ Barrett
## 14 2 10
## Cason Wallace Patrick Patterson Evan Turner
## 5 8 13
## Ekpe Udoh Jabari Parker Donovan Mitchell
## 15 15 1
## Dereck Lively II Buddy Hield Derrick Favors
## 9 1 3
## Ben McLemore Franz Wagner Onyeka Okongwu
## 13 7 12
## Austin Rivers Myles Turner D'Angelo Russell
## 6 8 6
## Harrison Barnes Isaac Okoro Patrick Williams
## 13 14 13
## Mikal Bridges Shai Gilgeous-Alexander Jarrett Culver
## 7 6 13
## Jayson Tatum Brandon Knight Lauri Markkanen
## 2 6 13
## Domantas Sabonis Andrew Wiggins Jamal Murray
## 8 2 6
## Paul George Xavier Henry Deandre Ayton
## 13 13 12
## Johnny Davis Gradey Dick Cameron Payne
## 13 13 5
## Ziaire Williams Otto Porter Jr. Scottie Barnes
## 13 11 13
## Ochai Agbaji Jeremy Lamb Jerome Robinson
## 7 7 5
## T.J. Warren Jaden Ivey Ben Simmons
## 14 13 12
## Damian Lillard P.J. Washington Trey Burke
## 5 13 5
## John Wall Luke Kennard Jalen Suggs
## 10 1 6
## Alex Len De'Andre Hunter Dennis Smith Jr.
## 9 7 10
## Brandon Ingram Marcus Smart Aaron Nesmith
## 6 6 1
## Doug McDermott Marcus Morris Jonathan Isaac
## 13 11 14
## Justise Winslow Evan Mobley Bam Adebayo
## 13 12 3
## Shabazz Muhammad Cole Aldrich Jaren Jackson Jr.
## 2 9 8
## Anthony Bennett Victor Oladipo Kris Dunn
## 14 7 5
## John Henson Karl-Anthony Towns Klay Thompson
## 9 8 5
## Nik Stauskas Kelly Olynyk Jakob Poeltl
## 1 11 8
## Devin Vassell Marquese Chriss Cam Reddish
## 7 14 5
## Kemba Walker Jalen Williams Al-Farouq Aminu
## 5 1 14
## Kevin Knox Anthony Edwards Greg Monroe
## 13 2 14
## Tristan Thompson James Bouknight Trae Young
## 12 13 4
## Terrence Ross Jalen Smith Jarace Walker
## 7 8 8
## Taylor Hendricks DeMarcus Cousins Gordon Hayward
## 14 12 13
## Obi Toppin Alec Burks Kentavious Caldwell-Pope
## 8 2 1
## Zach Collins Chet Holmgren Taurean Prince
## 8 3 7
## Ed Davis Trey Lyles Andre Drummond
## 9 8 3
## Devin Booker Joel Embiid Frank Kaminsky
## 7 3 11
## Jeremy Sochan Kira Lewis Jr. Michael Carter-Williams
## 11 5 1
## Chris Duarte Bennedict Mathurin Jahlil Okafor
## 1 13 12
## Ja Morant Dion Waiters Paolo Banchero
## 6 7 15
## Elfrid Payton Bradley Beal Thomas Robinson
## 6 13 11
## Jaxson Hayes Tyrese Haliburton Tyler Herro
## 3 7 13
##
## Within cluster sum of squares by cluster:
## [1] 11.914071 6.459357 14.072811 0.000000 8.126588 12.497657 9.151351
## [8] 6.825062 9.318869 9.067874 7.703117 21.895704 19.720030 14.760289
## [15] 4.467114
## (between_SS / total_SS = 93.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 4, pointsize = 1, show.clust.cent = FALSE, repel = TRUE,
xlab = "Dimension 1", ylab = "Dimension 2", main = "Clustering with K-means") + scale_y_reverse()
cluster_assignments <- df_cbb.kmeans$cluster
cluster_df15 <- data.frame(value = cluster_assignments, name = names(cluster_assignments))
cluster_assignments2 <- df_cbb.kmeans2$cluster
cluster_df10 <- data.frame(value = cluster_assignments2, name = names(cluster_assignments2))
cluster_assignments3 <- df_cbb.kmeans3$cluster
cluster_df5 <- data.frame(value = cluster_assignments3, name = names(cluster_assignments3))
cluster_df15 <- as.tibble(cluster_df15)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cluster_df10 <- as.tibble(cluster_df10)
cluster_df5 <- as.tibble(cluster_df5)
cluster_df15 <- cluster_df15 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df10 <- cluster_df10 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df5 <- cluster_df5 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df15
## # A tibble: 165 × 2
## name pc_cluster
## <chr> <int>
## 1 Lonzo Ball 13
## 2 Meyers Leonard 9
## 3 Wendell Carter Jr. 14
## 4 Derrick Williams 14
## 5 Davion Mitchell 1
## 6 Moses Moody 6
## 7 Noah Vonleh 14
## 8 Markieff Morris 11
## 9 Joshua Primo 7
## 10 Josh Jackson 15
## # ℹ 155 more rows
df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
## group dunk_made dunk_attempts dunk_pct rim_made
## Lonzo Ball 13 1.02777778 1.11111111 0.925 2.611111
## Meyers Leonard 9 0.78461538 0.86153846 0.911 1.676923
## Wendell Carter Jr. 14 1.51351351 1.56756757 0.966 3.243243
## Derrick Williams 14 0.81159420 0.86956522 0.933 1.956522
## Davion Mitchell 1 0.03333333 0.03333333 1.000 1.166667
## Moses Moody 6 0.31250000 0.34375000 0.909 1.937500
## rim_attempts rim_pct rim_asted other2pt_made
## Lonzo Ball 3.333333 0.783 0.511 0.4166667
## Meyers Leonard 2.215385 0.757 0.798 0.9230769
## Wendell Carter Jr. 4.729730 0.686 0.608 1.0810811
## Derrick Williams 2.724638 0.718 0.585 0.7101449
## Davion Mitchell 1.850000 0.631 0.186 0.4166667
## Moses Moody 3.437500 0.564 0.371 1.4062500
## other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## Lonzo Ball 0.8055556 0.517 0.200 0.732
## Meyers Leonard 2.0923077 0.441 0.500 0.585
## Wendell Carter Jr. 2.6216216 0.412 0.500 0.586
## Derrick Williams 1.7101449 0.415 0.388 0.598
## Davion Mitchell 0.9500000 0.439 0.080 0.519
## Moses Moody 3.5625000 0.395 0.311 0.478
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g
## Lonzo Ball 2.2 5.4 0.738 0.4074074 36 1.8
## Meyers Leonard 0.0 0.2 1.000 0.0000000 65 1.6
## Wendell Carter Jr. 0.5 1.2 0.947 0.4166667 37 3.4
## Derrick Williams 0.7 1.3 0.929 0.5384615 69 5.9
## Davion Mitchell 1.6 4.1 0.603 0.3902439 60 1.6
## Moses Moody 1.8 5.1 0.897 0.3529412 32 4.7
## fta_per_g ast_per_g_college orb_per_g drb_per_g stl_per_g
## Lonzo Ball 2.7 7.6 0.9 5.1 1.8
## Meyers Leonard 2.2 0.7 1.3 3.4 0.3
## Wendell Carter Jr. 4.5 2.0 2.9 6.1 0.8
## Derrick Williams 8.2 0.9 2.5 5.2 0.8
## Davion Mitchell 2.4 4.7 0.4 2.3 1.7
## Moses Moody 5.8 1.6 2.0 3.8 1.0
## blk_per_g tov_per_g pts_per_g_college
## Lonzo Ball 0.8 2.5 14.6
## Meyers Leonard 1.1 1.4 7.7
## Wendell Carter Jr. 2.1 2.0 13.5
## Derrick Williams 0.7 2.3 17.8
## Davion Mitchell 0.4 2.3 12.0
## Moses Moody 0.7 1.6 16.8
df_cbb.sample |> group_by(group) |>
summarize(across(everything(), mean)) |> print(n = 15, width = Inf)
## # A tibble: 15 × 27
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.154 0.176 0.816 0.899 1.44 0.623
## 2 2 0.620 0.683 0.904 2.60 4.07 0.639
## 3 3 2.06 2.16 0.959 3.51 4.49 0.787
## 4 4 0 0 0 3.28 6.28 0.522
## 5 5 0.126 0.142 0.904 1.08 1.75 0.615
## 6 6 0.318 0.348 0.862 2.09 3.38 0.626
## 7 7 0.271 0.301 0.914 1.04 1.55 0.682
## 8 8 0.728 0.781 0.925 2.10 2.86 0.738
## 9 9 0.784 0.833 0.943 1.49 2.04 0.745
## 10 10 0.600 0.657 0.868 3.20 5.16 0.620
## 11 11 0.380 0.431 0.906 1.34 1.86 0.727
## 12 12 2.02 2.14 0.943 4.93 6.48 0.760
## 13 13 0.487 0.536 0.913 1.69 2.53 0.670
## 14 14 0.978 1.07 0.921 2.65 3.88 0.682
## 15 15 1.24 1.36 0.914 3.20 4.78 0.675
## rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.288 0.590 1.47 0.401 0.168 0.496
## 2 0.358 1.52 4.21 0.360 0.200 0.502
## 3 0.634 0.905 2.59 0.357 0.469 0.630
## 4 0.114 1.19 2.78 0.427 0.026 0.493
## 5 0.232 0.850 2.20 0.383 0.194 0.480
## 6 0.272 1.24 3.32 0.368 0.138 0.499
## 7 0.468 0.616 1.50 0.397 0.296 0.546
## 8 0.576 0.894 2.16 0.414 0.503 0.6
## 9 0.705 0.636 1.65 0.351 0.633 0.581
## 10 0.247 1.80 4.82 0.362 0.0985 0.501
## 11 0.589 0.717 1.69 0.432 0.437 0.581
## 12 0.539 1.63 3.99 0.415 0.369 0.622
## 13 0.429 0.916 2.44 0.371 0.282 0.528
## 14 0.502 0.989 2.65 0.364 0.403 0.564
## 15 0.420 1.98 5.22 0.376 0.292 0.52
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.85 4.91 0.724 0.374 73.5 2.37 3.01
## 2 1.13 3.53 0.750 0.320 37.2 4.32 5.79
## 3 0.19 0.58 0.364 0.0888 32.3 2.62 4.11
## 4 3.7 10.3 0.263 0.359 32 7.4 8.6
## 5 1.9 5.18 0.634 0.365 81.4 3.55 4.45
## 6 1.87 4.9 0.667 0.375 41.8 4.04 5.22
## 7 1.4 3.69 0.870 0.379 73.3 1.75 2.27
## 8 0.508 1.43 0.807 0.345 54.5 2.72 3.7
## 9 0.0125 0.125 0.375 0.0312 72.1 1.62 2.7
## 10 1.72 4.8 0.546 0.355 32 4.72 6.43
## 11 0.45 1.31 0.935 0.414 95.5 2.31 3.35
## 12 0.233 0.733 0.624 0.149 34.1 4.01 6.38
## 13 1.47 3.97 0.809 0.366 53.9 2.98 3.88
## 14 0.673 1.88 0.834 0.325 45.4 3.43 4.9
## 15 0.7 2.02 0.921 0.314 37 3.82 5.5
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2.96 0.85 3.24 1.19 0.414 1.77
## 2 2.41 1.56 4.03 1.23 0.58 2.43
## 3 1.05 2.73 5.84 0.98 2.97 1.75
## 4 8.7 0.4 3.5 1.7 0.3 5.2
## 5 3.95 0.715 3.34 1.55 0.415 2.65
## 6 3.67 1.17 4.1 1.37 0.553 2.58
## 7 1.76 1.06 2.99 1.06 0.493 1.35
## 8 1.17 2.17 4.9 0.633 1.64 1.54
## 9 0.8 2.28 4.39 0.538 2.09 1.26
## 10 5 1.03 4.32 1.43 0.567 3.43
## 11 1.32 1.79 4.08 0.812 0.712 1.39
## 12 1.9 3.49 6.14 1.13 1.84 2.3
## 13 2.28 1.36 4.38 1.10 0.608 2.07
## 14 1.53 2.47 5.08 1.07 1.29 2.04
## 15 2.3 2.82 6 1.04 1.54 2.48
## pts_per_g_college
## <dbl>
## 1 13.0
## 2 16.4
## 3 12.0
## 4 27.4
## 5 16.6
## 6 17.2
## 7 11.1
## 8 12.5
## 9 8.34
## 10 20.0
## 11 10.5
## 12 17.9
## 13 14.4
## 14 14.4
## 15 16.3
df_cbb.sample |> group_by(group) |>
summarize(across(everything(), median))
## # A tibble: 15 × 27
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.140 0.202 0.904 0.943 1.43 0.627
## 2 2 0.619 0.723 0.923 2.68 4.07 0.641
## 3 3 2.24 2.31 0.958 3.63 4.56 0.776
## 4 4 0 0 0 3.28 6.28 0.522
## 5 5 0.123 0.123 1 1.04 1.65 0.609
## 6 6 0.3 0.333 0.903 2 3.25 0.634
## 7 7 0.302 0.32 0.902 1.08 1.52 0.692
## 8 8 0.718 0.767 0.932 2.03 2.85 0.728
## 9 9 0.721 0.783 0.954 1.34 1.78 0.741
## 10 10 0.624 0.644 0.925 3.13 4.98 0.629
## 11 11 0.358 0.381 0.926 1.34 1.82 0.735
## 12 12 1.94 2.08 0.951 4.82 6.64 0.762
## 13 13 0.473 0.520 0.907 1.69 2.59 0.663
## 14 14 0.85 0.9 0.933 2.59 3.69 0.686
## 15 15 1.05 1.13 0.925 3.3 4.92 0.676
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## # fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## # ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## # blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_career_stats |> filter(player == "Jaylen Brown") |>
select(player, pick_overall, year, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## # A tibble: 1 × 8
## player pick_overall year pts_per_g trb_per_g ast_per_g vorp g
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Jaylen Brown 3 2016 18.6 5.3 2.4 9.7 540
df_career_stats |> filter(year >= 2010)|>
group_by(pick_overall) |>
summarize(across(everything(), function(x) mean(x))) |>
select(pick_overall, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## Warning: There were 122 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) mean(x))`.
## ℹ In group 1: `pick_overall = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 121 remaining warnings.
## # A tibble: 61 × 6
## pick_overall pts_per_g trb_per_g ast_per_g vorp g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.9 6.65 4.28 14.2 380.
## 2 2 14.7 5.14 3.19 3.91 338.
## 3 3 17.4 6.58 3.31 12.0 420.
## 4 4 12 5.32 1.91 3 372.
## 5 5 12.5 4.61 3.36 4.84 377.
## 6 6 9.92 4.5 2.17 6.57 346.
## 7 7 12.4 5.08 2.51 3.77 430.
## 8 8 9.21 3.31 1.91 1.71 390.
## 9 9 10.3 4.76 2.31 5.61 418.
## 10 10 9.74 3.46 2.13 5.14 370.
## # ℹ 51 more rows
df_cluster2 <- df_cbb_scaled
df_cluster2$name <- rownames(df_cbb)
df_cluster2 <- df_cluster2 |> relocate(name, .before = dunk_made)
df_cluster2 <- column_to_rownames(df_cluster2, var = "name")
k1 <- 15
k2 <- 10
k3 <- 5
k15 <- kmeans(df_cluster2, centers = k1, nstart = 50)
k10 <- kmeans(df_cluster2, centers = k2, nstart = 50)
k5 <- kmeans(df_cluster2, centers = k3, nstart = 50)
temp_assign <- k15$cluster
combine <- function(df, kmeans) {
temp_assign <- kmeans$cluster
df_temp <- as.tibble(data.frame(value = temp_assign, name = names(temp_assign)))
if(!("all_cluster" %in% colnames(df_temp))) {
df_temp <- df_temp |> rename(all_cluster = value)
}
cluster_df <- left_join(df, df_temp, by = c("name"))
cluster_df <- cluster_df |> relocate(pc_cluster, .after = all_cluster)
# cluster_df <- cluster_df |> group_by(all_cluster, pc_cluster) |> summarize(n = n())
# return(cluster_df)
return(cluster_df)
}
groups15 <- combine(cluster_df15, k15) |> arrange(pc_cluster, all_cluster)
groups10 <- combine(cluster_df10, k10)
groups5 <- combine(cluster_df5, k5)
groups15
## # A tibble: 165 × 3
## name all_cluster pc_cluster
## <chr> <int> <int>
## 1 Michael Carter-Williams 1 1
## 2 Kendall Marshall 5 1
## 3 Denzel Valentine 8 1
## 4 Jett Howard 8 1
## 5 Jordan Hawkins 8 1
## 6 Donovan Mitchell 8 1
## 7 Buddy Hield 8 1
## 8 Aaron Nesmith 8 1
## 9 Nik Stauskas 8 1
## 10 Jalen Williams 8 1
## # ℹ 155 more rows
df_cbb <- df_cbb |> rownames_to_column(var = "name")
df_groups <- left_join(df_cbb, groups15, by = c("name"))
df_cbb <- df_cbb |> column_to_rownames(var = "name")
library(ggforce)
df_groups <- df_groups |>
mutate(bust = if_else(name %in% bust_list, 1, 0),
good = if_else(name %in% good_list, 1, 0))
df_groups |> group_by(pc_cluster) |>
summarize(n = n(),
across(everything(), mean)) |>
select(-name)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `pc_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 31
## pc_cluster n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 14 0.154 0.176 0.816 0.899 1.44
## 2 2 10 0.620 0.683 0.904 2.60 4.07
## 3 3 10 2.06 2.16 0.959 3.51 4.49
## 4 4 1 0 0 0 3.28 6.28
## 5 5 13 0.126 0.142 0.904 1.08 1.75
## 6 6 15 0.318 0.348 0.862 2.09 3.38
## 7 7 15 0.271 0.301 0.914 1.04 1.55
## 8 8 12 0.728 0.781 0.925 2.10 2.86
## 9 9 8 0.784 0.833 0.943 1.49 2.04
## 10 10 6 0.600 0.657 0.868 3.20 5.16
## 11 11 8 0.380 0.431 0.906 1.34 1.86
## 12 12 9 2.02 2.14 0.943 4.93 6.48
## 13 13 24 0.487 0.536 0.913 1.69 2.53
## 14 14 15 0.978 1.07 0.921 2.65 3.88
## 15 15 5 1.24 1.36 0.914 3.20 4.78
## # ℹ 24 more variables: rim_pct <dbl>, rim_asted <dbl>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## # fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## # ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## # blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>,
## # all_cluster <dbl>, bust <dbl>, good <dbl>
df_groups |> group_by(all_cluster) |>
summarize(n = n(),
across(everything(), mean)) |>
select(-name)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `all_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 31
## all_cluster n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 0.433 0.481 0.902 1.98 3.01
## 2 2 9 0.543 0.589 0.896 3.14 5.06
## 3 3 3 2.12 2.17 0.979 3.57 4.34
## 4 4 22 0.684 0.743 0.920 1.88 2.70
## 5 5 2 0 0 0 1.42 2.14
## 6 6 1 0 0 0 3.28 6.28
## 7 7 4 2.30 2.44 0.942 5.86 7.67
## 8 8 22 0.263 0.294 0.887 1.10 1.78
## 9 9 14 1.28 1.39 0.925 3.13 4.45
## 10 10 7 0.411 0.445 0.934 1.88 2.72
## 11 11 10 1.96 2.09 0.941 3.73 5.00
## 12 12 12 0.0783 0.0854 0.932 1.11 1.83
## 13 13 14 0.376 0.430 0.896 1.23 1.72
## 14 14 19 0.466 0.512 0.907 1.98 3.13
## 15 15 11 0.780 0.823 0.947 1.78 2.40
## # ℹ 24 more variables: rim_pct <dbl>, rim_asted <dbl>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## # fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## # ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## # blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>,
## # pc_cluster <dbl>, bust <dbl>, good <dbl>
df_pc <- left_join(df_cluster |> rownames_to_column("name"), groups15, by = "name")
df_pc <- left_join(df_pc, df_groups |> select(name, bust, good), by = "name")
df_pc <- df_pc |> column_to_rownames("name")
df_pc <- df_pc |>
mutate(
pc_cluster = as.factor(pc_cluster),
all_cluster = as.factor(all_cluster)
)
hulls <- df_pc |>
group_by(pc_cluster) |>
slice(chull(PC1, PC2))
p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = pc_cluster)) +
geom_polygon(data = hulls,
aes(group = pc_cluster, color = pc_cluster, fill = pc_cluster),
alpha = 0.2) +
geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) +
geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
geom_point() +
scale_y_reverse() +
scale_y_reverse() +
theme_minimal() +
labs(title = "Clusters with Bad and Good Value players",
x = "Dimension 1",
y = "Dimension 2",
color = "Cluster",
fill = "Cluster"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p
p2 <- p +
geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
aes(label = name),
size = 1.6,
max.overlaps = 20,
fill = NA,
label.size = NA,
segment.size = 0.2
)
p2